In [1]:
import pandas as pd
In [2]:
import os
In [3]:
os.chdir("D:\Dataset_MidTerm")
In [4]:
ec=pd.read_excel("D:\Dataset_MidTerm\Ecommerce_data.xlsx")
In [5]:
ec.head()
Out[5]:
Row ID Order ID Order Date Ship Date Ship Mode Customer ID Customer Name Segment Postal Code City ... Sub-Category Product Name Sales Quantity Discount Profit Shipping Cost Order Priority Unnamed: 24 values
0 40098 CA-2014-AB10015140-41954 2014-11-11 2014-11-13 First Class AB-100151402 Aaron Bergman Consumer 73120.0 Oklahoma City ... Phones Samsung Convoy 3 221.980 2 0.0 62.1544 40.77 High NaN 110.990
1 26341 IN-2014-JR162107-41675 2014-02-05 2014-02-07 Second Class JR-162107 Justin Ritter Corporate NaN Wollongong ... Chairs Novimex Executive Leather Armchair, Black 3709.395 9 0.1 -288.7650 923.63 Critical NaN 412.155
2 25330 IN-2014-CR127307-41929 2014-10-17 2014-10-18 First Class CR-127307 Craig Reiter Consumer NaN Brisbane ... Phones Nokia Smart Phone, with Caller ID 5175.171 9 0.1 919.9710 915.49 Medium NaN 575.019
3 13524 ES-2014-KM1637548-41667 2014-01-28 2014-01-30 First Class KM-1637548 Katherine Murray Home Office NaN Berlin ... Phones Motorola Smart Phone, Cordless 2892.510 5 0.1 -96.5400 910.16 Medium NaN 578.502
4 47221 SG-2014-RH9495111-41948 2014-11-05 2014-11-06 Same Day RH-9495111 Rick Hansen Consumer NaN Dakar ... Copiers Sharp Wireless Fax, High-Speed 2832.960 8 0.0 311.5200 903.04 Critical NaN 354.120

5 rows × 26 columns

In [6]:
ec.tail()
Out[6]:
Row ID Order ID Order Date Ship Date Ship Mode Customer ID Customer Name Segment Postal Code City ... Sub-Category Product Name Sales Quantity Discount Profit Shipping Cost Order Priority Unnamed: 24 values
51285 29002 IN-2015-KE1642066-42174 2015-06-19 2015-06-19 Same Day KE-1642066 Katrina Edelman Corporate NaN Kure ... Fasteners Advantus Thumb Tacks, 12 Pack 65.10 5 0.0 4.5000 1.010 Medium NaN 13.020
51286 34337 US-2014-ZD21925140-41765 2014-05-06 2014-05-10 Standard Class ZD-219251408 Zuschuss Donatelli Consumer 37421.0 Chattanooga ... Furnishings Eldon Image Series Desk Accessories, Burgundy 16.72 5 0.2 3.3440 1.930 High NaN 3.344
51287 31315 CA-2012-ZD21925140-41147 2012-08-26 2012-08-31 Second Class ZD-219251404 Zuschuss Donatelli Consumer 94109.0 San Francisco ... Art Newell 341 8.56 2 0.0 2.4824 1.580 High NaN 4.280
51288 9596 MX-2013-RB1979518-41322 2013-02-17 2013-02-21 Standard Class RB-1979518 Ross Baird Home Office NaN Valinhos ... Binders Acco Index Tab, Economy 13.44 2 0.0 2.4000 1.003 Medium NaN 6.720
51289 6147 MX-2013-MC1810093-41416 2013-05-22 2013-05-26 Second Class MC-1810093 Mick Crebagga Consumer NaN Tipitapa ... Paper Eaton Computer Printout Paper, 8.5 x 11 61.38 3 0.0 1.8000 1.002 High NaN 20.460

5 rows × 26 columns

In [7]:
ec.shape
Out[7]:
(51290, 26)
In [8]:
ec.columns
Out[8]:
Index(['Row ID', 'Order ID', 'Order Date', 'Ship Date', 'Ship Mode',
       'Customer ID', 'Customer Name', 'Segment', 'Postal Code', 'City',
       'State', 'Country', 'Region', 'Market', 'Product ID', 'Category',
       'Sub-Category', 'Product Name', 'Sales', 'Quantity', 'Discount',
       'Profit', 'Shipping Cost', 'Order Priority', 'Unnamed: 24', 'values'],
      dtype='object')

ec1=pd.read_csv("D:\Dataset_MidTerm\Return.csv")

In [9]:
ec1=pd.read_csv("D:\Dataset_MidTerm\Return.csv")
In [10]:
ec1
Out[10]:
Returned Order ID Region
0 Yes CA-2012-SA20830140-41210 Central US
1 Yes IN-2012-PB19210127-41259 Eastern Asia
2 Yes CA-2012-SC20095140-41174 Central US
3 Yes IN-2015-JH158207-42140 Oceania
4 Yes IN-2014-LC168857-41747 Oceania
... ... ... ...
1074 Yes IN-2014-DA1345058-41769 Southern Asia
1075 Yes US-2013-HG14845140-41530 Eastern US
1076 Yes US-2013-SJ2021582-41543 Central America
1077 Yes CA-2015-EB13870140-42269 Eastern US
1078 Yes EG-2014-PM894038-41846 North Africa

1079 rows × 3 columns

In [11]:
ec1.columns
Out[11]:
Index(['Returned', 'Order ID', 'Region'], dtype='object')
In [12]:
ec1.groupby(['Order ID','Region'])['Returned'].sum().sort_values(ascending=True)
Out[12]:
Order ID                  Region         
AE-2014-MY7380138-42004   Western Asia       Yes
IT-2015-ME18010124-42235  Northern Europe    Yes
IT-2015-RB19570120-42181  Southern Europe    Yes
IV-2014-AM70529-41678     Western Africa     Yes
IV-2014-LC688529-41898    Western Africa     Yes
                                            ... 
ES-2015-CW11905139-42288  Northern Europe    Yes
ES-2015-DK1309045-42158   Western Europe     Yes
ES-2015-DK13375139-42082  Northern Europe    Yes
ES-2015-BF11170139-42174  Northern Europe    Yes
ZA-2015-SW10350146-42061  Eastern Africa     Yes
Name: Returned, Length: 1079, dtype: object
In [13]:
ec2=pd.read_excel("D:\Dataset_MidTerm\Ecommerce_data1.xlsx")
In [14]:
ec2
Out[14]:
Row ID Order ID Order Date Ship Date Ship Mode Customer ID Customer Name Segment Postal Code City ... Sub-Category Product Name Sales Quantity Discount Profit Shipping Cost Order Priority Unnamed: 24 values
0 40098 CA-2014-AB10015140-41954 2014-11-11 2014-11-13 First Class AB-100151402 Aaron Bergman Consumer 73120.0 Oklahoma City ... Phones Samsung Convoy 3 221.980 2 0.0 62.1544 40.770 High NaN 110.990
1 26341 IN-2014-JR162107-41675 2014-02-05 2014-02-07 Second Class JR-162107 Justin Ritter Corporate NaN Wollongong ... Chairs Novimex Executive Leather Armchair, Black 3709.395 9 0.1 -288.7650 923.630 Critical NaN 412.155
2 25330 IN-2014-CR127307-41929 2014-10-17 2014-10-18 First Class CR-127307 Craig Reiter Consumer NaN Brisbane ... Phones Nokia Smart Phone, with Caller ID 5175.171 9 0.1 919.9710 915.490 Medium NaN 575.019
3 13524 ES-2014-KM1637548-41667 2014-01-28 2014-01-30 First Class KM-1637548 Katherine Murray Home Office NaN Berlin ... Phones Motorola Smart Phone, Cordless 2892.510 5 0.1 -96.5400 910.160 Medium NaN 578.502
4 47221 SG-2014-RH9495111-41948 2014-11-05 2014-11-06 Same Day RH-9495111 Rick Hansen Consumer NaN Dakar ... Copiers Sharp Wireless Fax, High-Speed 2832.960 8 0.0 311.5200 903.040 Critical NaN 354.120
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
51285 29002 IN-2015-KE1642066-42174 2015-06-19 2015-06-19 Same Day KE-1642066 Katrina Edelman Corporate NaN Kure ... Fasteners Advantus Thumb Tacks, 12 Pack 65.100 5 0.0 4.5000 1.010 Medium NaN 13.020
51286 34337 US-2014-ZD21925140-41765 2014-05-06 2014-05-10 Standard Class ZD-219251408 Zuschuss Donatelli Consumer 37421.0 Chattanooga ... Furnishings Eldon Image Series Desk Accessories, Burgundy 16.720 5 0.2 3.3440 1.930 High NaN 3.344
51287 31315 CA-2012-ZD21925140-41147 2012-08-26 2012-08-31 Second Class ZD-219251404 Zuschuss Donatelli Consumer 94109.0 San Francisco ... Art Newell 341 8.560 2 0.0 2.4824 1.580 High NaN 4.280
51288 9596 MX-2013-RB1979518-41322 2013-02-17 2013-02-21 Standard Class RB-1979518 Ross Baird Home Office NaN Valinhos ... Binders Acco Index Tab, Economy 13.440 2 0.0 2.4000 1.003 Medium NaN 6.720
51289 6147 MX-2013-MC1810093-41416 2013-05-22 2013-05-26 Second Class MC-1810093 Mick Crebagga Consumer NaN Tipitapa ... Paper Eaton Computer Printout Paper, 8.5 x 11 61.380 3 0.0 1.8000 1.002 High NaN 20.460

51290 rows × 26 columns

In [15]:
ec3 = pd.ExcelFile("D:\Dataset_MidTerm\Ecommerce_data.xlsx")
In [16]:
ec3
Out[16]:
<pandas.io.excel._base.ExcelFile at 0x131c9ebbf90>
In [17]:
print(ec3.sheet_names)
['Orders', 'Returns', 'People']
In [18]:
o_df = pd.read_excel(ec3, 'Orders')
r_df = pd.read_excel(ec3, 'Returns')

Details of the sheets¶

In [19]:
o_df.head(), r_df.head()
Out[19]:
(   Row ID                  Order ID Order Date  Ship Date     Ship Mode  \
 0   40098  CA-2014-AB10015140-41954 2014-11-11 2014-11-13   First Class   
 1   26341    IN-2014-JR162107-41675 2014-02-05 2014-02-07  Second Class   
 2   25330    IN-2014-CR127307-41929 2014-10-17 2014-10-18   First Class   
 3   13524   ES-2014-KM1637548-41667 2014-01-28 2014-01-30   First Class   
 4   47221   SG-2014-RH9495111-41948 2014-11-05 2014-11-06      Same Day   
 
     Customer ID     Customer Name      Segment  Postal Code           City  \
 0  AB-100151402     Aaron Bergman     Consumer      73120.0  Oklahoma City   
 1     JR-162107     Justin Ritter    Corporate          NaN     Wollongong   
 2     CR-127307      Craig Reiter     Consumer          NaN       Brisbane   
 3    KM-1637548  Katherine Murray  Home Office          NaN         Berlin   
 4    RH-9495111       Rick Hansen     Consumer          NaN          Dakar   
 
    ... Sub-Category                               Product Name     Sales  \
 0  ...       Phones                           Samsung Convoy 3   221.980   
 1  ...       Chairs  Novimex Executive Leather Armchair, Black  3709.395   
 2  ...       Phones          Nokia Smart Phone, with Caller ID  5175.171   
 3  ...       Phones             Motorola Smart Phone, Cordless  2892.510   
 4  ...      Copiers             Sharp Wireless Fax, High-Speed  2832.960   
 
   Quantity Discount    Profit Shipping Cost Order Priority  Unnamed: 24  \
 0        2      0.0   62.1544         40.77           High          NaN   
 1        9      0.1 -288.7650        923.63       Critical          NaN   
 2        9      0.1  919.9710        915.49         Medium          NaN   
 3        5      0.1  -96.5400        910.16         Medium          NaN   
 4        8      0.0  311.5200        903.04       Critical          NaN   
 
     values  
 0  110.990  
 1  412.155  
 2  575.019  
 3  578.502  
 4  354.120  
 
 [5 rows x 26 columns],
   Returned                  Order ID        Region
 0      Yes  CA-2012-SA20830140-41210    Central US
 1      Yes  IN-2012-PB19210127-41259  Eastern Asia
 2      Yes  CA-2012-SC20095140-41174    Central US
 3      Yes    IN-2015-JH158207-42140       Oceania
 4      Yes    IN-2014-LC168857-41747       Oceania)
In [20]:
concat_df = pd.merge(o_df, r_df, on='Order ID')# Order id is a primary key
concat_df
Out[20]:
Row ID Order ID Order Date Ship Date Ship Mode Customer ID Customer Name Segment Postal Code City ... Sales Quantity Discount Profit Shipping Cost Order Priority Unnamed: 24 values Returned Region_y
0 30191 IN-2012-PB19210127-41259 2012-12-16 2012-12-19 First Class PB-19210127 Phillip Breyer Corporate NaN Taipei ... 1715.160 2 0.0 720.3600 725.570 Critical NaN 857.580 Yes Eastern Asia
1 30190 IN-2012-PB19210127-41259 2012-12-16 2012-12-19 First Class PB-19210127 Phillip Breyer Corporate NaN Taipei ... 2197.500 5 0.0 153.7500 627.270 Critical NaN 439.500 Yes Eastern Asia
2 30187 IN-2012-PB19210127-41259 2012-12-16 2012-12-19 First Class PB-19210127 Phillip Breyer Corporate NaN Taipei ... 1356.030 3 0.0 311.8500 458.970 Critical NaN 452.010 Yes Eastern Asia
3 30193 IN-2012-PB19210127-41259 2012-12-16 2012-12-19 First Class PB-19210127 Phillip Breyer Corporate NaN Taipei ... 882.150 5 0.0 114.6000 203.920 Critical NaN 176.430 Yes Eastern Asia
4 30189 IN-2012-PB19210127-41259 2012-12-16 2012-12-19 First Class PB-19210127 Phillip Breyer Corporate NaN Taipei ... 148.320 3 0.0 68.2200 25.480 Critical NaN 49.440 Yes Eastern Asia
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
2215 35040 CA-2014-WB21850140-41654 2014-01-15 2014-01-21 Standard Class WB-218501406 William Brown Consumer 10011.0 New York City ... 49.536 3 0.2 17.3376 2.670 Medium NaN 16.512 Yes Eastern US
2216 35038 CA-2014-WB21850140-41654 2014-01-15 2014-01-21 Standard Class WB-218501406 William Brown Consumer 10011.0 New York City ... 16.520 4 0.0 7.5992 1.690 Medium NaN 4.130 Yes Eastern US
2217 25 US-2013-SJ2021582-41543 2013-09-26 2013-09-29 First Class SJ-2021582 Sarah Jordon Consumer NaN Hermosillo ... 181.116 9 0.4 -75.5640 1.085 Medium NaN 20.124 Yes Central America
2218 36974 CA-2012-ZC21910140-41271 2012-12-28 2013-01-04 Standard Class ZC-219101402 Zuschuss Carroll Consumer 60610.0 Chicago ... 38.976 3 0.6 -50.6688 5.290 Low NaN 12.992 Yes Central US
2219 41315 EG-2014-PM894038-41846 2014-07-26 2014-07-30 Standard Class PM-894038 Paul MacIntyre Consumer NaN Alexandria ... 25.830 1 0.0 9.0300 1.050 Medium NaN 25.830 Yes North Africa

2220 rows × 28 columns

In [21]:
concat_df.isnull().any().sum()
Out[21]:
2

Exploratory data analysis¶

In [22]:
ec3 = concat_df.dropna(axis=1)
In [23]:
ec3.shape
Out[23]:
(2220, 26)
In [24]:
ec3.columns
Out[24]:
Index(['Row ID', 'Order ID', 'Order Date', 'Ship Date', 'Ship Mode',
       'Customer ID', 'Customer Name', 'Segment', 'City', 'State', 'Country',
       'Region_x', 'Market', 'Product ID', 'Category', 'Sub-Category',
       'Product Name', 'Sales', 'Quantity', 'Discount', 'Profit',
       'Shipping Cost', 'Order Priority', 'values', 'Returned', 'Region_y'],
      dtype='object')
In [25]:
list(ec3)
Out[25]:
['Row ID',
 'Order ID',
 'Order Date',
 'Ship Date',
 'Ship Mode',
 'Customer ID',
 'Customer Name',
 'Segment',
 'City',
 'State',
 'Country',
 'Region_x',
 'Market',
 'Product ID',
 'Category',
 'Sub-Category',
 'Product Name',
 'Sales',
 'Quantity',
 'Discount',
 'Profit',
 'Shipping Cost',
 'Order Priority',
 'values',
 'Returned',
 'Region_y']

1.Total value of returns¶

In [26]:
total_returned_value = ec3['Sales'].sum()
print(' Total Returned Value is',total_returned_value)
 Total Returned Value is 525932.22736
In [27]:
total_returned_value = ec3['values'].sum()
print('Total Returned Value is',total_returned_value)
Total Returned Value is 157010.21778

2.Total returned value by region¶

In [28]:
returned_value_by_region = ec3.groupby(['Region_y'])[['Sales','values']].sum()
returned_value_by_region
Out[28]:
Sales values
Region_y
Caribbean 9679.32828 2969.33504
Central Africa 4038.48000 1756.05000
Central America 44334.79916 13078.52248
Central Asia 359.03100 146.19600
Central US 23306.14500 5002.83300
Eastern Africa 2026.44000 1214.04000
Eastern Asia 47798.04300 13874.01300
Eastern Canada 2639.91000 654.06000
Eastern Europe 9501.21000 4476.66000
Eastern US 23477.32600 6992.91000
North Africa 10193.37000 5644.29000
Northern Europe 17768.64000 5526.58200
Oceania 46943.33100 12685.42200
South America 29845.11212 9224.02916
Southeastern Asia 36967.09830 11068.52910
Southern Africa 7321.86000 3246.45000
Southern Asia 25159.36500 8395.40700
Southern Europe 34226.34300 7325.53500
Southern US 17024.59700 4209.27650
Western Africa 3928.15800 2623.65600
Western Asia 11737.63800 5182.62600
Western Canada 676.50000 638.37000
Western Europe 72669.48450 17705.63850
Western US 44310.01800 13369.78700
In [43]:
import random
import seaborn as sns
In [46]:
sns.lmplot(x='Quantity',y='Sales',hue='values',data=ec3)
C:\Users\devar\anaconda3\Lib\site-packages\seaborn\axisgrid.py:118: UserWarning: The figure layout has changed to tight
  self._figure.tight_layout(*args, **kwargs)
Out[46]:
<seaborn.axisgrid.FacetGrid at 0x131c6adc810>
In [29]:
ec3['Unit Price'] = ec3['Sales'] / ec3['Quantity']
ec3
C:\Users\devar\AppData\Local\Temp\ipykernel_15912\3027465675.py:1: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ec3['Unit Price'] = ec3['Sales'] / ec3['Quantity']
Out[29]:
Row ID Order ID Order Date Ship Date Ship Mode Customer ID Customer Name Segment City State ... Sales Quantity Discount Profit Shipping Cost Order Priority values Returned Region_y Unit Price
0 30191 IN-2012-PB19210127-41259 2012-12-16 2012-12-19 First Class PB-19210127 Phillip Breyer Corporate Taipei Taipei City ... 1715.160 2 0.0 720.3600 725.570 Critical 857.580 Yes Eastern Asia 857.580
1 30190 IN-2012-PB19210127-41259 2012-12-16 2012-12-19 First Class PB-19210127 Phillip Breyer Corporate Taipei Taipei City ... 2197.500 5 0.0 153.7500 627.270 Critical 439.500 Yes Eastern Asia 439.500
2 30187 IN-2012-PB19210127-41259 2012-12-16 2012-12-19 First Class PB-19210127 Phillip Breyer Corporate Taipei Taipei City ... 1356.030 3 0.0 311.8500 458.970 Critical 452.010 Yes Eastern Asia 452.010
3 30193 IN-2012-PB19210127-41259 2012-12-16 2012-12-19 First Class PB-19210127 Phillip Breyer Corporate Taipei Taipei City ... 882.150 5 0.0 114.6000 203.920 Critical 176.430 Yes Eastern Asia 176.430
4 30189 IN-2012-PB19210127-41259 2012-12-16 2012-12-19 First Class PB-19210127 Phillip Breyer Corporate Taipei Taipei City ... 148.320 3 0.0 68.2200 25.480 Critical 49.440 Yes Eastern Asia 49.440
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
2215 35040 CA-2014-WB21850140-41654 2014-01-15 2014-01-21 Standard Class WB-218501406 William Brown Consumer New York City New York ... 49.536 3 0.2 17.3376 2.670 Medium 16.512 Yes Eastern US 16.512
2216 35038 CA-2014-WB21850140-41654 2014-01-15 2014-01-21 Standard Class WB-218501406 William Brown Consumer New York City New York ... 16.520 4 0.0 7.5992 1.690 Medium 4.130 Yes Eastern US 4.130
2217 25 US-2013-SJ2021582-41543 2013-09-26 2013-09-29 First Class SJ-2021582 Sarah Jordon Consumer Hermosillo Sonora ... 181.116 9 0.4 -75.5640 1.085 Medium 20.124 Yes Central America 20.124
2218 36974 CA-2012-ZC21910140-41271 2012-12-28 2013-01-04 Standard Class ZC-219101402 Zuschuss Carroll Consumer Chicago Illinois ... 38.976 3 0.6 -50.6688 5.290 Low 12.992 Yes Central US 12.992
2219 41315 EG-2014-PM894038-41846 2014-07-26 2014-07-30 Standard Class PM-894038 Paul MacIntyre Consumer Alexandria Al Iskandariyah ... 25.830 1 0.0 9.0300 1.050 Medium 25.830 Yes North Africa 25.830

2220 rows × 27 columns

3.Total returned value for category and sub-category using product id¶

In [30]:
returned_value = concat_df.groupby(['Category', 'Sub-Category'])[['Product ID']].sum()
returned_value
Out[30]:
Product ID
Category Sub-Category
Furniture Bookcases FUR-BO-5762FUR-BO-3624FUR-BO-4848FUR-BO-5788FU...
Chairs FUR-CH-5454FUR-CH-5378FUR-CH-5378FUR-CH-4397FU...
Furnishings FUR-FU-4086FUR-FU-3928FUR-FU-3028FUR-FU-4086FU...
Tables FUR-TA-5060FUR-TA-5065FUR-TA-3766FUR-TA-3780FU...
Office Supplies Appliances OFF-AP-4961OFF-AP-4958OFF-AP-3870OFF-AP-3575OF...
Art OFF-AR-5920OFF-AR-6124OFF-AR-5930OFF-AR-3494OF...
Binders OFF-BI-2897OFF-BI-3723OFF-BI-4809OFF-BI-4807OF...
Envelopes OFF-EN-3097OFF-EN-4914OFF-EN-5040OFF-EN-5023OF...
Fasteners OFF-FA-2943OFF-FA-6207OFF-FA-6204OFF-FA-3064OF...
Labels OFF-LA-4552OFF-LA-6064OFF-LA-3312OFF-LA-4541OF...
Paper OFF-PA-4004OFF-PA-6614OFF-PA-4470OFF-PA-6577OF...
Storage OFF-ST-4516OFF-ST-6263OFF-ST-4057OFF-ST-6031OF...
Supplies OFF-SU-6182OFF-SU-4319OFF-SU-4120OFF-SU-4317OF...
Technology Accessories TEC-AC-4164TEC-AC-4185TEC-AC-4178TEC-AC-5121TE...
Copiers TEC-CO-6012TEC-CO-5991TEC-CO-4593TEC-CO-6001TE...
Machines TEC-MA-5494TEC-MA-6150TEC-MA-6138TEC-MA-5546TE...
Phones TEC-PH-3147TEC-PH-5815TEC-PH-3148TEC-PH-3805TE...

4.What is total number of products returned whose unit price is more than 100dollars?¶

In [31]:
ec5 = ec3[ec3['values'] > 100]
total_returned_products = ec5['Quantity'].sum()
print('The Total Returned Products is', total_returned_products)
The Total Returned Products is 1607

5.How many of the orders deliverd on the same day, 7days, 15days and >30 days?¶

In [32]:
ec3['delivery_days'] = (ec3['Ship Date'] - ec3['Order Date']).dt.days
ec
Out[32]:
Row ID Order ID Order Date Ship Date Ship Mode Customer ID Customer Name Segment Postal Code City ... Sub-Category Product Name Sales Quantity Discount Profit Shipping Cost Order Priority Unnamed: 24 values
0 40098 CA-2014-AB10015140-41954 2014-11-11 2014-11-13 First Class AB-100151402 Aaron Bergman Consumer 73120.0 Oklahoma City ... Phones Samsung Convoy 3 221.980 2 0.0 62.1544 40.770 High NaN 110.990
1 26341 IN-2014-JR162107-41675 2014-02-05 2014-02-07 Second Class JR-162107 Justin Ritter Corporate NaN Wollongong ... Chairs Novimex Executive Leather Armchair, Black 3709.395 9 0.1 -288.7650 923.630 Critical NaN 412.155
2 25330 IN-2014-CR127307-41929 2014-10-17 2014-10-18 First Class CR-127307 Craig Reiter Consumer NaN Brisbane ... Phones Nokia Smart Phone, with Caller ID 5175.171 9 0.1 919.9710 915.490 Medium NaN 575.019
3 13524 ES-2014-KM1637548-41667 2014-01-28 2014-01-30 First Class KM-1637548 Katherine Murray Home Office NaN Berlin ... Phones Motorola Smart Phone, Cordless 2892.510 5 0.1 -96.5400 910.160 Medium NaN 578.502
4 47221 SG-2014-RH9495111-41948 2014-11-05 2014-11-06 Same Day RH-9495111 Rick Hansen Consumer NaN Dakar ... Copiers Sharp Wireless Fax, High-Speed 2832.960 8 0.0 311.5200 903.040 Critical NaN 354.120
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
51285 29002 IN-2015-KE1642066-42174 2015-06-19 2015-06-19 Same Day KE-1642066 Katrina Edelman Corporate NaN Kure ... Fasteners Advantus Thumb Tacks, 12 Pack 65.100 5 0.0 4.5000 1.010 Medium NaN 13.020
51286 34337 US-2014-ZD21925140-41765 2014-05-06 2014-05-10 Standard Class ZD-219251408 Zuschuss Donatelli Consumer 37421.0 Chattanooga ... Furnishings Eldon Image Series Desk Accessories, Burgundy 16.720 5 0.2 3.3440 1.930 High NaN 3.344
51287 31315 CA-2012-ZD21925140-41147 2012-08-26 2012-08-31 Second Class ZD-219251404 Zuschuss Donatelli Consumer 94109.0 San Francisco ... Art Newell 341 8.560 2 0.0 2.4824 1.580 High NaN 4.280
51288 9596 MX-2013-RB1979518-41322 2013-02-17 2013-02-21 Standard Class RB-1979518 Ross Baird Home Office NaN Valinhos ... Binders Acco Index Tab, Economy 13.440 2 0.0 2.4000 1.003 Medium NaN 6.720
51289 6147 MX-2013-MC1810093-41416 2013-05-22 2013-05-26 Second Class MC-1810093 Mick Crebagga Consumer NaN Tipitapa ... Paper Eaton Computer Printout Paper, 8.5 x 11 61.380 3 0.0 1.8000 1.002 High NaN 20.460

51290 rows × 26 columns

In [33]:
same_day = ec3[ec3['delivery_days'] == 0].shape[0]
within_7_days = ec3[(ec3['delivery_days'] > 0) & (ec3['delivery_days'] <= 7)].shape[0]
within_15_days = ec3[(ec3['delivery_days'] > 7) & (ec3['delivery_days'] <= 15)].shape[0]
more_than_30_days = ec3[ec3['delivery_days'] > 30].shape[0]
In [34]:
print(f"Same Day: {same_day}")
print(f"Within 7 Days: {within_7_days}")
print(f"Within 15 Days: {within_15_days}")
print(f"More than 30 Days: {more_than_30_days}")
Same Day: 115
Within 7 Days: 2105
Within 15 Days: 0
More than 30 Days: 0

6.Check the relationship between price and quantity?¶

In [35]:
from scipy.stats import pearsonr
pearsonr(ec3['values'],ec3['Quantity'])
Out[35]:
PearsonRResult(statistic=-0.028330344451430955, pvalue=0.18208808203216745)

There is no relationship between the price and quantity because the r value is -0.02¶

7. Use the calculation from question 5 and order priority. Check is there any relationship between order priority and days taken.¶

In [36]:
order_priority = ec3.groupby('Order Priority')['delivery_days'].describe()
print(order_priority)
                 count      mean       std  min  25%  50%  75%  max
Order Priority                                                     
Critical         188.0  1.750000  1.135829  0.0  1.0  2.0  3.0  3.0
High             643.0  3.205288  1.453807  0.0  2.0  4.0  4.0  5.0
Low              125.0  6.552000  0.499290  6.0  6.0  7.0  7.0  7.0
Medium          1264.0  4.528481  1.393053  0.0  4.0  5.0  5.0  7.0
In [37]:
from scipy import stats
In [38]:
anova_result = stats.f_oneway(
    ec3[ec3['Order Priority'] == 'Low']['delivery_days'],
    ec3[ec3['Order Priority'] == 'Medium']['delivery_days'],
    ec3[ec3['Order Priority'] == 'High']['delivery_days'],
    ec3[ec3['Order Priority'] == 'Critical']['delivery_days'])

print(f"ANOVA result: {anova_result}")
ANOVA result: F_onewayResult(statistic=456.90010105295534, pvalue=4.523164187330846e-231)
In [ ]: